data belum full, sementara pake newsgroup20
In [4]:
import numpy as np
from optparse import OptionParser
import sys
from time import time
import matplotlib.pyplot as plt
Paket komplit lengkap pake telor , juosh
In [8]:
#data dummies
from sklearn.dataset import fetch_20newsgroups
#preprocessing, tambahin PCA
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import HashingVectorizer
#Optimalisasi, kasih grid
from sklearn.pipeline import Pipeline
#sementara svm sama linear
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
In [3]:
from pprint import pprint
pprint(list(newsgroups_train.target_names))